# This file uses the raw OSM and GRIP roads data and prepares the shapefiles, 
# one for each country, that we will use in the matlab code. 

######### SETUP, LOAD PACKAGES #########
import geopandas as gpd 
import pandas as pd
import os
import numpy as np

DIR = '/Users/nicolegorton/Dropbox/OTN_LAC'

################ Compute average roads by road type and country
# load in each shapefile, add a column corresponding to the country
# generate some summary statistics and add a column with length to the 
# dataframe 
os.chdir(DIR+'/data/LAC/')
folders = os.listdir()

def num(s):
	try:
		float(s)
		return float(s)
	except:
		return None

def get_type(road):
	if 'motorway' in road or 'trunk' in road:
		return 1
	if 'primary' in road:
		return 2
	if 'secondary' in road:
		return 3
	if 'tertiary' in road:
		return 4
	else:
		return 5

def get_surface_type(s):
	if s=='paved' or s=='asphalt':
		return 1
	else:
		return 0

# list of countries in sample 
agg1 = pd.DataFrame()
for j in folders:
	print('Now processing: ' + j)
	if (len(j)==2):  
		
		# get the OSM folder for that country
		shp_folder = list(filter(lambda k: 'osm' in k, os.listdir(DIR+'/data/LAC/'+j)))
		
		# some countries have multiple files, we need to load them all 
		for s in range(len(shp_folder)):
			print(s)
			file_name = shp_folder[s].replace('_shp', '.shp')
			if s == 0:
				country_shp = gpd.read_file(DIR+'/data/LAC/'+j+'/'+shp_folder[s]+'/'+file_name)
			if s > 0:
				shp = gpd.read_file(DIR+'/data/LAC/'+j+'/'+shp_folder[s]+'/'+file_name)
				country_shp = country_shp.append(shp)
				print(country_shp)
		
		# get road type based on string 
		country_shp['ROADTYPE'] = country_shp['highway'].apply(get_type)
		
		# calculate distance 
		country_shp = country_shp.to_crs(epsg=3035) # change crs to compute distance 
		country_shp['distance'] = country_shp.length
		country_shp['lanes'] = country_shp['lanes'].apply(num)
		country_shp['lanes'] = country_shp['lanes']*country_shp['distance']
		
		# drop rows missing number of lanes 
		country_shp = country_shp[country_shp['lanes'].isnull()==False]
		agg = country_shp[['distance', 'lanes', 'ROADTYPE']].groupby('ROADTYPE').sum()
		agg['lanes'] = agg['lanes']/agg['distance'] # compute distance-weighted average number of lanes across road type 
		agg = agg[['lanes']]
		agg['country'] = j
		agg1 = agg.append(agg1)
		agg1.to_csv(DIR+'/data/LAC/lanes_by_road_type.csv')

################ PREPARE ROAD SHAPEFILES FOR EACH COUNTRY ####################
# load in each shapefile, add a column corresponding to the country
# generate some summary statistics and add a column with length to the dataframe 

# list of countries for which you want to make a road shapefile 
countries = ['Costa Rica', 'Brazil', 'Argentina', 'Colombia', 'Peru', 'Mexico', 'Chile', \
 'Panama', 'El Salvador', 'Bolivia', 'Ecuador', 'Paraguay', 'Uruguay', 'Nicaragua', 'Guatemala','Venezuela']

# There's a column with the country identifier (IS0 1 code)
country_codes1 = pd.read_csv(DIR + '/data/GRIP/country_to_iso1.csv')
country_codes1['Country'] = country_codes1['Country'].apply(lambda x: x.strip())

# Merge in ISO2 codes so we can get the folders
country_codes2  = pd.read_csv(DIR + '/data/LAC/country_icc_codes.csv')
country_codes = country_codes1.merge(country_codes2,
	left_on = 'Country', right_on='Name')

# load in GRIP shapefile (very large)
grip_shp = gpd.read_file(DIR+'/data/GRIP/grip_shp.shp')

# merge on country codes 
country_shp = pd.merge(grip_shp, country_codes, left_on = 'GP_RCY', right_on='ISO1')

# keep countries we want
country_shp = country_shp[country_shp['Country'].apply(lambda x: True if x.strip() in countries else False)]

# generate variables for missing 
country_shp['missing_primary'] = (country_shp['GP_RTP']==0)
country_shp['missing_paved'] = (country_shp['GP_RSE']==0)

# create variables we need to create weights 
country_shp['paved'] = country_shp['GP_RSE'].apply(lambda x: 1 if x == 1 else 0) 
country_shp['primary'] = country_shp['GP_RTP'].apply(lambda x: 1 if x == 1 or x == 2 else 0) 
country_shp['segment'] = 1

# adjusts coordinate projection to get distance 
country_shp = country_shp.to_crs(epsg=3035)

# calculate distance 
country_shp['distance'] = country_shp.length
country_shp['distance'] = country_shp['distance']/1000

# merge in number of lanes, created above from OSM data 
lanes = pd.read_csv(DIR+'/data/LAC/lanes_by_road_type.csv')

country_shp = country_shp.merge(lanes, how='left', 
	left_on = ['GP_RTP', 'Code'], right_on=['ROADTYPE', 'country'])

# if missing lanes from merge, assume single lane
country_shp['lanes'][country_shp['lanes'].isnull()==True] = 1
# country_shp['lanes'][country_shp['ROADTYPE']==5] = 1

# separate by country and save into the respective folder
for c in countries:
	print(c)
	temp = country_shp[country_shp['Country']==c]
	print(str(len(temp)))
	code = country_codes['Code'][country_codes['Name']==c].iloc[0]
	if c == 'Brazil':
		temp = temp[temp['GP_RTP']!=5]

	temp = temp.to_crs(epsg=4326) # adjust CRS so consistent with other files 
	temp.to_file(DIR+'/data/LAC/'+code+'/'+code+'_GRIP.shp')

## Prepare road network summary statistics based on GRIP data shapefiles 
os.chdir(DIR+'/data/LAC/')
folders = os.listdir()
total2 = pd.DataFrame(columns=['Country', 'lanes_dist', 'distance'])

for j in folders:
	if (len(j)==2):  # Excludes Brazil, don't use shapefiles for that one
		country_shp = gpd.read_file(DIR+'/data/LAC/'+ j+'/'+j+'_GRIP.shp')
		country_shp = country_shp.to_crs(epsg=3035)
		country_shp['distance'] = country_shp.length
		country_shp['distance'] = country_shp['distance']/1000
		country_shp['lanes_dist'] = country_shp['lanes'] * country_shp['distance']
		data = []
		data.append([j, sum(country_shp['lanes_dist'])/sum(country_shp['distance']), sum(country_shp['distance'])])
		df = pd.DataFrame(data, columns=['Country', 'lanes_dist', 'distance'])

		total2 = total2.append(df)
		total2.to_csv(DIR+'/data/LAC/road_network_stats.csv')


